library("igraph")
library("ggplot2")
library('Matrix')
library('pracma')
Attaching package: ‘igraph’
The following objects are masked from ‘package:stats’:
decompose, spectrum
The following object is masked from ‘package:base’:
union
Registered S3 methods overwritten by 'ggplot2':
method from
[.quosures rlang
c.quosures rlang
print.quosures rlang
Warning message:
“package ‘pracma’ was built under R version 3.6.3”
Attaching package: ‘pracma’
The following objects are masked from ‘package:Matrix’:
expm, lu, tril, triu
# Problem 1(a)
n = 1000
for(p in c(0.003, 0.004, 0.01, 0.05, 0.1)){
g = sample_gnp(n, p)
deg = degree(g)
hist = degree_distribution(g)
print(c('p:', p, mean(deg), var(deg)))
plot(hist, type="b", main=paste("Node Degree Distribution: p= ", p), lwd = 3,
xlab="Degree", ylab="Normalized Frequency",col="blue",cex.main = 2, cex.lab =1.5, cex.axis = 1.3)
}
# Theoretical Mean
probs = c(0.003, 0.004, 0.01, 0.05, 0.1);
probs * 999
# Theoretical Var
probs * 999 * (1-probs)
[1] "p:" "0.003" "3.016" "2.86260660660661" [1] "p:" "0.004" "4.024" "4.22164564564565"
[1] "p:" "0.01" "9.752" "9.83433033033033"
[1] "p:" "0.05" "49.498" "46.5004964964965"
[1] "p:" "0.1" "100.24" "91.0194194194194"
# Problem 1(b)
n = 1000
GCC_mode <- character(5)
for(p in c(0.003, 0.004, 0.01, 0.05, 0.1)){
cnt_connected = 0
GCC_vector = character(100)
i=0
for (val in 1:100){
g = sample_gnp(n,p)
if(!is_connected(g)){ # to check if the graph is not connected
i = i+1
# Find diameter of largest component
components = clusters(g)
biggest_cluster_id = which.max(components$csize)
vert_ids = V(g)[components$membership == biggest_cluster_id]
GCC_vector[i] = diameter(induced_subgraph(g, vert_ids))
} else {
cnt_connected = cnt_connected + 1
}
}
unique_GCCs = unique(GCC_vector)
unique_GCCs = unique_GCCs[unique_GCCs != ""]
GGC_mode = unique_GCCs[which.max(tabulate(match(GCC_vector, unique_GCCs)))]
print(paste("For p = ", p," Connected = ",cnt_connected," Mode GCC: ",GGC_mode))
print("----------------------------")
}
# Prob connected vs p
x = c()
y = c()
for(p in (4:10)*0.001){
cnt_connected = 0
for (val in 1:100){
g = sample_gnp(n,p)
if(is_connected(g)){ # to check if the graph is not connected
cnt_connected = cnt_connected + 1
}
}
x = c(x,p)
y = c(y,cnt_connected/100)
}
# Plot Prob connected vs p
plot(x, y, type = 'b', main="Probability Connected vs. p", xlab="p", ylab="P_connected",
cex.main = 2, cex.lab =1.5, cex.axis = 1.3,col = "blue")
[1] "For p = 0.003 Connected = 0 Mode GCC: 14" [1] "----------------------------" [1] "For p = 0.004 Connected = 0 Mode GCC: 11" [1] "----------------------------" [1] "For p = 0.01 Connected = 95 Mode GCC: 6" [1] "----------------------------" [1] "For p = 0.05 Connected = 100 Mode GCC: NA" [1] "----------------------------" [1] "For p = 0.1 Connected = 100 Mode GCC: NA" [1] "----------------------------"
# Problem 1(c)
n = 1000
x = c()
y = c()
x_avg = c()
y_avg = c()
for(p in (1:33)*0.0003){
# print(p)
y_p = c()
for(i in 1:100){
g = sample_gnp(n, p)
if(!is_connected(g)){ # to check if the graph is not connected
# Find size of largest CC
components = clusters(g)
size = max(components$csize)
# Update vectors with largest CC
x = c(x, p)
y = c(y, size/1000)
y_p = c(y_p, size/1000)
}
}
if(length(y_p)!=0){
x_avg = c(x_avg, p)
y_avg = c(y_avg, mean(y_p))
}
}
# Plot Results
plot(x, y, main="GCC Fraction vs. p", xlab="p", ylab="GCC Fraction",
cex.main = 2, cex.lab =1.5, cex.axis = 1.3)
lines(x_avg, y_avg,col="blue",lwd=2)
# Zoom in on region .0001 .004
x = c()
y = c()
x_avg = c()
y_avg = c()
for(p in (1:40)*0.0001){
# print(p)
y_p = c()
for(i in 1:100){
g = sample_gnp(n, p)
if(!is_connected(g)){ # to check if the graph is not connected
# Find size of largest CC
components = clusters(g)
size = max(components$csize)
# Update vectors with largest CC
x = c(x, p)
y = c(y, size/1000)
y_p = c(y_p, size/1000)
}
}
if(length(y_p)!=0){
x_avg = c(x_avg, p)
y_avg = c(y_avg, mean(y_p))
}
}
# Plot Results
plot(x, y, main="GCC Fraction vs. p Zoomed", xlab="p", ylab="GCC Fraction",
cex.main = 2, cex.lab =1.5, cex.axis = 1.3)
lines(x_avg, y_avg,col="blue",lwd=2)
#Theoretical Emergence and 99% GCC
print(paste("Transition Point 1 (Emergence): ", 1/n))
print(paste("Transition Point 2 (Dominance): ", log(n)/n))
[1] "Transition Point 1 (Emergence): 0.001" [1] "Transition Point 2 (Dominance): 0.00690775527898214"
# Problem 1(d)(i)
constant = 0.5
x = c()
y = c()
x_avg = c()
y_avg = c()
for(n in 100:10000){
p = constant/n
x = c(x, n)
y_p = c()
for(i in 1:10){
g = sample_gnp(n, p)
if(!is_connected(g)){ # to check if the graph is not connected
# Find size of largest CC
components = clusters(g)
size = max(components$csize)
# Update vector with largest CC
y_p = c(y_p, size)
}
}
y_avg = c(y_avg, mean(y_p))
}
plot(x, y_avg, main="Mean GCC Size c = 0.5", xlab="n", ylab="GCC Size",
cex=.2, cex.main = 2, cex.lab =1.5, cex.axis = 1.3)
xlog = log(x)
ylog = log(y_avg)
plot(xlog, ylog, main="Log LogMean GCC Size c = 0.5", xlab="n", ylab="GCC Size",
cex=.2, cex.main = 2, cex.lab =1.5, cex.axis = 1.3)
lm(ylog~xlog)
Call:
lm(formula = ylog ~ xlog)
Coefficients:
(Intercept) xlog
0.9624 0.2119
# Problem 1(d)(ii)
constant = 1
x = c()
y = c()
x_avg = c()
y_avg = c()
for(n in 100:10000){
p = constant/n
x = c(x, n)
y_p = c()
for(i in 1:10){
g = sample_gnp(n, p)
if(!is_connected(g)){ # to check if the graph is not connected
# Find size of largest CC
components = clusters(g)
size = max(components$csize)
# Update vector with largest CC
y_p = c(y_p, size)
}
}
y_avg = c(y_avg, mean(y_p))
}
plot(x, y_avg, main="Mean GCC Size c = 1", xlab="n", ylab="GCC Size",
cex=.2, cex.main = 2, cex.lab =1.5, cex.axis = 1.3)
xlog = log(x)
ylog = log(y_avg)
plot(xlog, ylog, main="Log LogMean GCC Size c = 1", xlab="n", ylab="GCC Size",
cex=.2, cex.main = 2, cex.lab =1.5, cex.axis = 1.3)
lm(ylog~xlog)
Call:
lm(formula = ylog ~ xlog)
Coefficients:
(Intercept) xlog
-0.1096 0.6693
# Problem 1(d)(iii)
constant = 1.1
x = c()
y = c()
y_avg_1 = c()
for(n in (10:1000)*10){
p = constant/n
x = c(x, n)
y_p = c()
for(i in 1:10){
g = sample_gnp(n, p)
if(!is_connected(g)){ # to check if the graph is not connected
# Find size of largest CC
components = clusters(g)
size = max(components$csize)
# Update vector with largest CC
y_p = c(y_p, size)
}
}
y_avg_1 = c(y_avg_1, mean(y_p))
}
length(y_avg_1)
# Problem 1(d)(iii): Plot
plot(x, y_avg_1, main="Mean GCC Size vs n", xlab="n", ylab="GCC Size",
cex=.2, cex.main = 2, cex.lab =1.5, cex.axis = 1.3)
points(x, y_avg_2, cex=.2,col="blue")
points(x, y_avg_3, cex=.2,col="red")
legend("topleft", legend=c("c=1.3", "c=1.2", "c=1.3"),
col=c("red", "blue","black"), lty=1:2, cex=0.8)
# Problem 2(a)
graph <- sample_pa(1000,m=5,directed=F)
plot(graph, vertex.size=5, vertex.label=NA)
is.connected(graph)
# Problem 2(b)
community <- cluster_fast_greedy(graph)
modularity(community)
length(community)
plot(community, graph,vertex.label=NA, vertex.size=3)
# Problem 2(c)
graph_larger <- sample_pa(10000,m=5,directed=F)
community_larger <- cluster_fast_greedy(graph_larger)
modularity(community_larger)
length(community_larger)
plot(community_larger, graph_larger,vertex.label=NA, vertex.size=3)
# Problem 2(d): 1000
# Linear regression uses the lm() function to create a regression model
degree <- degree.distribution(graph)
log_degree <- log(degree)
remove <- is.infinite(log_degree)
log_degree <- log(degree[!remove])
xlog <- 1:length(degree)
xlog <- log(xlog[!remove])
plot(degree,log='xy', cex=.5,col="blue",xlab = "degree",ylab = "frequency",main="Degree distribution m=2 in log-log Scale")
print("Slope estimation using linear regression for n=1000")
lm(log_degree~xlog)
# Problem 2(d): 10000
# Linear regression uses the lm() function to create a regression model
degree_larger <- degree.distribution(graph_larger)
log_degree_larger <- log(degree_larger)
remove_larger <- is.infinite(log_degree_larger)
log_degree_larger <- log(degree_larger[!remove_larger])
xlog_larger <- 1:length(degree_larger)
xlog_larger <- log(xlog_larger[!remove_larger])
points(degree_larger,cex=.5,col="red")
print("Slope estimation using linear regression for n=10000")
lm(log_degree_larger~xlog_larger)
legend("topleft", legend=c("n=1000", "n=10000"),
col=c("blue","red"), lty=1:2, cex=0.8)
Warning message in xy.coords(x, y, xlabel, ylabel, log): “68 y values <= 0 omitted from logarithmic plot”
[1] "Slope estimation using linear regression for n=1000"
Call:
lm(formula = log_degree ~ xlog)
Coefficients:
(Intercept) xlog
1.623 -2.127
[1] "Slope estimation using linear regression for n=10000"
Call:
lm(formula = log_degree_larger ~ xlog_larger)
Coefficients:
(Intercept) xlog_larger
1.388 -2.198
# Problem 2(e)
sample_count_degree = function(graph) {
# sample vcount nodes, then sample their 1 neighbor for each, and get the degree
degrees = array(0, vcount(graph))
for (i in 1:vcount(graph)) {
node <- sample(vcount(graph), 1)
neighbors <- neighbors(graph, node, mode="total")
if (length(neighbors) > 0) {
neighbor = sample(length(neighbors), 1)
degrees[i] <- degree(graph, neighbors[neighbor])
}
}
# count the frequency of each degree
freq <- sort(degrees)
df <- data.frame(degree=c(NA), freq=c(NA))
df <- df[-1, ]
curDegree <- freq[1]
count <- 1
for (i in 2:length(freq)) {
if (freq[i] != curDegree) {
df[nrow(df)+1, ] <- c(curDegree, count)
curDegree <- freq[i]
count <- 1
} else {
count <- count + 1
}
}
df[nrow(df)+1, ] <- c(curDegree, count)
return (df)
}
random_degree_distribution_orig = function(df, n) {
# plot original degree distribution histogram
text_size <- 0
dist <- 0
if (n == 1000) {
text_size <- 4
dist <- 5
} else {
text_size <- 2
dist <- 30
}
ggplot(df, aes(x=degree, y=freq))+
geom_bar(stat = "identity", fill="firebrick1")+
geom_text(aes(x=degree,y=freq+dist,label=freq),color="firebrick1",size=text_size,show.legend = T)+
labs(title=paste("Random Degree Distribution of the Network (n=",n,", m=2)",collapse=""), x="Degree", y="Frequency")+
theme(plot.title = element_text(hjust = 0.5))
}
random_degree_distribution_log = function(df, n) {
# calculate P for each degree
degree <- log2(df["degree"])
distr <- log2(df["freq"]/n)
dataframe <- data.frame(degree, distr)
print(dataframe)
plot(dataframe, type="o", col="firebrick1", main=paste("Random Degree Distribution in Log Scale (n=",n,", m=2)",collapse=""),xlab="log(degree)",ylab="log(possibility)")
# if (n == 1000) {
# lines(seq(1,4.5,0.035), seq(-2,-8,-0.06),col="dodgerblue1",lty=2)
# } else {
# lines(seq(1,5.5,0.045), seq(-2.2,-9.4,-0.072),col="dodgerblue1",lty=2)
# }
lm(distr[["freq"]]~degree[["degree"]])
}
# plot n=1000
print("=================================== n = 1000 =====================================")
df <- sample_count_degree(graph)
random_degree_distribution_orig(df, 1000)
random_degree_distribution_log(df, 1000)
print("==================================================================================")
# plot n=10000
print("=================================== n = 10000 ====================================")
df <- sample_count_degree(graph_larger)
random_degree_distribution_orig(df, 10000)
random_degree_distribution_log(df, 10000)
print("==================================================================================")
[1] "=================================== n = 1000 ====================================="
degree freq
1 2.321928 -3.210897
2 2.584963 -3.307573
3 2.807355 -3.643856
4 3.000000 -3.756331
5 3.169925 -4.158429
6 3.321928 -4.132894
7 3.459432 -4.836501
8 3.584963 -4.608232
9 3.700440 -4.643856
10 3.807355 -5.321928
11 3.906891 -5.795859
12 4.000000 -5.210897
13 4.087463 -5.058894
14 4.169925 -6.965784
15 4.247928 -6.506353
16 4.321928 -6.158429
17 4.392317 -5.643856
18 4.459432 -5.878321
19 4.523562 -6.058894
20 4.584963 -6.506353
21 4.643856 -6.795859
22 4.700440 -8.965784
23 4.754888 -5.717857
24 4.807355 -6.965784
25 4.857981 -6.058894
26 4.906891 -7.380822
27 5.044394 -7.643856
28 5.087463 -8.965784
29 5.129283 -8.380822
30 5.169925 -6.506353
31 5.209453 -9.965784
32 5.247928 -7.965784
33 5.321928 -7.158429
34 5.357552 -8.380822
35 5.426265 -9.965784
36 5.491853 -7.158429
37 5.554589 -8.965784
38 5.584963 -8.965784
39 5.614710 -7.380822
40 5.643856 -7.380822
41 5.754888 -7.380822
42 5.807355 -6.643856
43 5.977280 -6.265345
44 6.000000 -7.380822
45 6.189825 -6.506353
46 6.228819 -6.795859
47 6.321928 -7.158429
48 6.442943 -6.265345
49 6.614710 -6.380822
50 6.882643 -7.380822
Call:
lm(formula = distr[["freq"]] ~ degree[["degree"]])
Coefficients:
(Intercept) degree[["degree"]]
-1.447 -1.057
[1] "==================================================================================" [1] "=================================== n = 10000 ===================================="
degree freq 1 2.321928 -3.194955 2 2.584963 -3.469130 3 2.807355 -3.537843 4 3.000000 -3.880445 5 3.169925 -4.197600 6 3.321928 -4.192315 7 3.459432 -4.405069 8 3.584963 -4.615287 9 3.700440 -4.768076 10 3.807355 -5.194955 11 3.906891 -5.356975 12 4.000000 -5.310432 13 4.087463 -6.020926 14 4.169925 -5.820107 15 4.247928 -5.878321 16 4.321928 -6.546245 17 4.392317 -6.158429 18 4.459432 -6.210897 19 4.523562 -6.811979 20 4.584963 -6.506353 21 4.643856 -6.658356 22 4.700440 -6.764150 23 4.754888 -7.020926 24 4.807355 -7.058894 25 4.857981 -7.117787 26 4.906891 -6.930160 27 4.954196 -7.643856 28 5.000000 -7.221623 29 5.044394 -7.158429 30 5.087463 -7.673003 31 5.129283 -7.221623 32 5.169925 -8.117787 33 5.209453 -7.702750 34 5.247928 -8.158429 35 5.285402 -8.158429 36 5.321928 -8.643856 37 5.357552 -8.587273 38 5.392317 -10.117787 39 5.426265 -7.930160 40 5.459432 -10.480357 41 5.491853 -8.380822 42 5.523562 -8.117787 43 5.554589 -8.380822 44 5.584963 -9.380822 45 5.614710 -7.643856 46 5.643856 -8.200250 47 5.672425 -8.965784 48 5.700440 -11.287712 49 5.727920 -10.480357 50 5.754888 -9.287712 51 5.781360 -10.480357 52 5.807355 -8.480357 53 5.832890 -9.287712 54 5.857981 -8.965784 55 5.882643 -8.587273 56 5.906891 -8.078259 57 5.930737 -9.039785 58 5.954196 -8.429731 59 5.977280 -8.895395 60 6.000000 -11.702750 61 6.022368 -10.117787 62 6.044394 -11.287712 63 6.066089 -9.287712 64 6.087463 -10.480357 65 6.108524 -8.643856 66 6.129283 -8.287712 67 6.149747 -9.200250 68 6.169925 -9.117787 69 6.189825 -10.702750 70 6.228819 -10.480357 71 6.285402 -9.287712 72 6.321928 -12.287712 73 6.357552 -9.380822 74 6.375039 -9.039785 75 6.392317 -10.117787 76 6.409391 -10.117787 77 6.426265 -10.117787 78 6.442943 -9.380822 79 6.523562 -9.587273 80 6.569856 -9.965784 81 6.584963 -9.965784 82 6.629357 -11.702750 83 6.643856 -9.828281 84 6.700440 -9.117787 85 6.832890 -9.702750 86 6.857981 -9.380822 87 6.882643 -8.895395 88 6.988685 -9.965784 89 7.000000 -9.587273 90 7.011227 -8.429731 91 7.076816 -8.287712 92 7.108524 -9.287712 93 7.189825 -8.895395 94 7.257388 -9.587273 95 7.266787 -8.965784 96 7.426265 -8.158429 97 7.467606 -8.702750 98 7.507795 -8.965784 99 7.515700 -8.895395 100 7.679480 -9.117787 101 7.748193 -8.828281 102 7.754888 -8.587273 103 7.845490 -8.702750 104 7.954196 -8.429731 105 7.988685 -9.117787 106 8.285402 -8.587273
Call:
lm(formula = distr[["freq"]] ~ degree[["degree"]])
Coefficients:
(Intercept) degree[["degree"]]
-1.671 -1.152
[1] "=================================================================================="
# Problem 2(f)
# iterate 1000 times, and calculate the mean degree for each age
degrees <- array(0, 1000)
for (i in 1:1000) {
g <- barabasi.game(1000,m=5,directed=F)
degrees <- degrees+degree(g)
}
degrees <- degrees/1000
age <- c(999:0)
age_degree_df <- data.frame(age, degrees)
sum(degrees)
# plot original expected degree vs age
plot(age_degree_df, type="l", col="blue", main="Expected Degree of Nodes vs Age of Nodes (n=1000, m=5)",xlab="Age",ylab="Expected Degree")
# fitting the curve
# Problem 2(h)
# generate a new network with n=1000, m=1
g <- sample_pa(1000, m=1, directed=F)
communities1 <- cluster_fast_greedy(g)
print(paste("Modularity of original network: ",modularity(communities1),collapse=""))
print(paste("Number of communities of original network: ",length(communities1),collapse=""))
plot(g, mark.groups=groups(communities1), edge.arrow.size=.5,
vertex.color="gold", vertex.size=6, vertex.frame.color="gray", vertex.label="",
main="Original Network")
# 2) use "simple.no.multiple"
degseq_graph2 <- sample_degseq(degree(g), method="simple.no.multiple")
communities3 <- cluster_fast_greedy(degseq_graph2)
print(paste("Modularity of degree sequence network (simple.no.multiple method): ",modularity(communities3),collapse=""))
print(paste("Number of communities of degree sequence network (simple.no.multiple method): ",length(communities3),collapse=""))
plot(degseq_graph2, mark.groups=groups(communities3), edge.arrow.size=.5,
vertex.color="gold", vertex.size=6, vertex.frame.color="gray", vertex.label="",
main="Degree Sequence Generated")
print(paste("simple.no.multiple method: ",is.connected(degseq_graph2)))
[1] "Modularity of original network: 0.932626821015213" [1] "Number of communities of original network: 34" [1] "Modularity of degree sequence network (simple.no.multiple method): 0.847706565424286" [1] "Number of communities of degree sequence network (simple.no.multiple method): 136"
[1] "simple.no.multiple method: FALSE"
# Problem 3(a)
# m = 1,α=1,β=−1,a=c=d=1,b=0 (defaults in R funtion)
graph = sample_pa_age(1000,m=1,pa.exp = 1,aging.exp=-1,directed=F)
hist = degree_distribution(graph)
plot(hist, type="b", main="Node Degree Distribution", lwd = 3,
xlab="Degree", ylab="Normalized Frequency",col="blue",cex.main = 2, cex.lab =1.5, cex.axis = 1.3)
# Estimate power law exponent
log_degree = log(hist)
remove = is.infinite(log_degree)
log_degree = log(hist[!remove])
xlog = 1:length(hist)
xlog <- log(xlog[!remove])
plot(hist,log='xy',type="b", main="Log-Log Node Degree Distribution", lwd = 3,
xlab="Degree", ylab="Normalized Frequency",col="blue",cex.main = 2, cex.lab =1.5, cex.axis = 1.3)
lm(log_degree~xlog)
Warning message in xy.coords(x, y, xlabel, ylabel, log): “3 y values <= 0 omitted from logarithmic plot”
Call:
lm(formula = log_degree ~ xlog)
Coefficients:
(Intercept) xlog
2.328 -3.451
# Problem 3(b)
community = cluster_fast_greedy(graph)
modularity(community)
length(community)
plot(community, graph,vertex.label=NA, vertex.size=3)
# Given code on CCLE
create_transition_matrix = function (g){
# WARNING: make sure your graph is connected (you might input GCC of your graph)
vs = V(g)
n = vcount(g)
adj = as_adjacency_matrix(g)
adj[diag(rowSums(adj) == 0)] = 1 # handle if the user is using the function for networks with isolated nodes by creating self-edges
z = matrix(rowSums(adj, , 1))
transition_matrix = adj / repmat(z, 1, n) # normalize to get probabilities
return(transition_matrix)
}
random_walk = function (g, num_steps, start_node, transition_matrix = NULL){
if(is.null(transition_matrix))
transition_matrix = create_transition_matrix(g)
v = start_node
for(i in 1:num_steps){
# fprintf('Step %d: %d\n', i, v) # COMMENT THIS
PMF = transition_matrix[v, ]
v = sample(1:vcount(g), 1, prob = PMF)
}
return(v)
}
# Problem 1(a)
graph <- erdos.renyi.game(1000, 0.01, directed=FALSE)
plot(graph, vertex.size=3, vertex.label=NA, edge.arrow.size=1)
transition_matrix = create_transition_matrix(graph)
# Problem 1(b)
ts = 100 # Number of steps. Diameter too large
iteration_times = 100
# Store mean and var
avg_distances = c()
var_distances = c()
transition_matrix = create_transition_matrix(graph)
# Random walk
for (t in (1:ts)){
temp_distances = c()
for (i in 1:iteration_times){
start_node = sample(vcount(graph), 1)
last_node = random_walk(graph, t, start_node, transition_matrix)
cur_distance = distances(graph, start_node, last_node)
temp_distances = c(temp_distances, cur_distance)
}
avg_distances = c(avg_distances, mean(temp_distances))
var_distances = c(var_distances, var(temp_distances))
}
plot(avg_distances, type='l', xlab = "step", ylab = "Average distance", main="Average distance vs. step t for 1000 nodes")
plot(var_distances, type='l', xlab = "step", ylab = "Variance of distance", main="Variance of distance vs. step t for 1000 nodes")
# Problem 1(c)
# Measure the degree distribution of the nodes reached at the end of the random walk.
# How does it compare to the degree distribution of graph?
step_number = 100
random_number = 100
end_nodes = rep(NA, random_number)
for (start_node in (1: random_number)) {
node = random_walk(graph, step_number, start_node, transition_matrix)
end_nodes[start_node] <- node
}
hist(degree(graph),main="Degree distribution of graph",xlab="Degree",ylab="Frequency")
hist(degree(graph, end_nodes),main="Degree distribution of the nodes reached at end of Random Walks",type = 'o', xlab="Degree",ylab="Frequency")
Warning message in plot.window(xlim, ylim, "", ...): “graphical parameter "type" is obsolete”Warning message in title(main = main, sub = sub, xlab = xlab, ylab = ylab, ...): “graphical parameter "type" is obsolete”Warning message in axis(1, ...): “graphical parameter "type" is obsolete”Warning message in axis(2, ...): “graphical parameter "type" is obsolete”
# Problem 1(d)
# Repeat 1(b) for undirected random networks with 10000 nodes.
graph <- erdos.renyi.game(10000, 0.01, directed=FALSE)
transition_matrix = create_transition_matrix(graph)
ts = 100 # Number of steps. Diameter too large
iteration_times = 100
# Store mean and var
avg_distances = c()
var_distances = c()
transition_matrix = create_transition_matrix(graph)
# Random walk
for (t in (1:ts)){
temp_distances = c()
for (i in 1:iteration_times){
start_node = sample(vcount(graph), 1)
last_node = random_walk(graph, t, start_node, transition_matrix)
cur_distance = distances(graph, start_node, last_node)
temp_distances = c(temp_distances, cur_distance)
}
avg_distances = c(avg_distances, mean(temp_distances))
var_distances = c(var_distances, var(temp_distances))
}
plot(avg_distances, type='l', xlab = "step", ylab = "Average distance", main="Average distance vs. step t for 10000 nodes")
plot(var_distances, type='l', xlab = "step", ylab = "Variance of distance", main="Variance of distance vs. step t for 10000 nodes")
# Problem 2(a)
# Generate networks using preferential attachment model
n = 1000
m = 1
g1 <- barabasi.game(n=n, m=m, directed=FALSE)
plot(g1, vertex.size=3, vertex.label=NA, edge.arrow.size=1)
# Find diameter
cur_diameter = diameter(g1, directed = FALSE)
cat(paste("\nDiameter of the graph: ", cur_diameter))
Diameter of the graph: 21
# Problem 2(b)
ts = seq(1:200) # Number of steps. Diameter too large
iteration_times = 1000
# Store mean and var
avg_distances = c()
var_distances = c()
transition_matrix = create_transition_matrix(g1)
# Random walk
for (t in ts)
{
temp_distances = c()
for (i in 1:iteration_times)
{
start_node = sample(vcount(g1), 1)
last_node = random_walk(g1, t, start_node, transition_matrix)
cur_distance = distances(g1, start_node, last_node)
temp_distances = c(temp_distances, cur_distance)
}
avg_distances = c(avg_distances, mean(temp_distances))
var_distances = c(var_distances, var(temp_distances))
}
plot(ts, avg_distances, type="o", main="Mean distances of random walk", xlab="Steps",ylab="Mean distances")
plot(ts, var_distances, type="o", main="Variances of the random walk distances", xlab="Steps",ylab="Variances")
# Problem 2(c)
# Store degrees
random_walk_degrees = c()
iteration_times = 1000
t = 200 # Number of steps (Reach steady state)
transition_matrix = create_transition_matrix(g1)
for (i in 1:iteration_times)
{
start_node = sample(vcount(g1), 1)
last_node = random_walk(g1, t, start_node, transition_matrix)
cur_degree = degree(g1, last_node)
random_walk_degrees = c(random_walk_degrees, cur_degree)
}
# Problem 2(c): Plot
h = hist(random_walk_degrees, breaks=seq(0, max(random_walk_degrees)), main ="Random Walk Histogram of degree distribution", xlab="Degree")
plot_x = tail(h$breaks, length(h$breaks) - 1) # Remove 0 degree
plot_y = h$density
plot(plot_x, plot_y, main="Degree distribution of the network", xlab="Degree", ylab="Density", type="o")
plot(plot_x, plot_y, log="xy", main="Random Walk Degree distribution of the network(log-log)", xlab="Degree", ylab="Density")
Warning message in xy.coords(x, y, xlabel, ylabel, log): “9 y values <= 0 omitted from logarithmic plot”
# Problem 2(c): Plotting
# Degree distribution of the graph
hist(degree(g1)[2:length(degree(g1))], breaks=seq(0, max(degree(g1))), main="Full Graph Histogram of the degree distribution",xlab="Degree",ylab="Frequency")
plot(degree.distribution(g1)[2:length(degree(g1))],xlim=c(-2,30), main="Degree distribution of the network",xlab="Degree",ylab="Frequency", type="o")
plot(degree.distribution(g1)[2:length(degree(g1))], log="xy", main="Full Graph Degree distribution of the network (log-log)",xlab="Degree",ylab="Frequency")
Warning message in xy.coords(x, y, xlabel, ylabel, log): “9 y values <= 0 omitted from logarithmic plot”
# Problem 2(d): n=100
n = 100
cat("\nResults with n = ", n)
m = 1
g2 = barabasi.game(n=n, m=m, directed=FALSE)
ts = seq(1:200) # Number of steps.
iteration_times = 1000
transition_matrix = create_transition_matrix(g2)
cur_connected = is.connected(g2)
cat(paste('\nIs the graph connected: ', cur_connected))
# Find diameter
cur_diameter = diameter(g2, directed=FALSE)
cat(paste("\nDiameter of GCC: ", cur_diameter))
#cur_diameter = diameter(g1, directed = FALSE)
#cat(paste("\nDiameter of the graph: ", cur_diameter))
# Store mean and var
avg_distances = c()
var_distances = c()
# Random walk
for (t in ts)
{
temp_distances = c()
for (i in 1:iteration_times)
{
start_node = sample(vcount(g2), 1)
last_node = random_walk(g2, t, start_node, transition_matrix)
cur_distance = distances(g2, start_node, last_node)
temp_distances = c(temp_distances, cur_distance)
}
avg_distances = c(avg_distances, mean(temp_distances))
var_distances = c(var_distances, var(temp_distances))
}
Results with n = 100 Is the graph connected: TRUE Diameter of GCC: 13
# Problem 2(d): n=10000
n = 10000
cat("\nResults with n = ", n)
m = 1
g3 = barabasi.game(n=n, m=m, directed=FALSE)
ts = seq(1:80) # Number of steps.
iteration_times = 1000
transition_matrix = create_transition_matrix(g3)
cur_connected = is.connected(g3)
cat(paste('\nIs the graph connected: ', cur_connected))
# Find diameter
cur_diameter = diameter(g3, directed=FALSE)
cat(paste("\nDiameter of GCC: ", cur_diameter))
# Store mean and var
avg_distances = c()
var_distances = c()
# Random walk
for (t in ts)
{
temp_distances = c()
for (i in 1:iteration_times)
{
start_node = sample(vcount(g3), 1)
last_node = random_walk(g3, t, start_node, transition_matrix)
cur_distance = distances(g3, start_node, last_node)
temp_distances = c(temp_distances, cur_distance)
}
avg_distances = c(avg_distances, mean(temp_distances))
var_distances = c(var_distances, var(temp_distances))
}
plot(ts, avg_distances, type="o", main=paste("Mean distances of random walk with n = ", n), xlab="Steps",ylab="Mean distances")
plot(ts, var_distances, type="o", main=paste("Variances of the random walk distances with n = ", n), xlab="Steps",ylab="Variances")
# Functions
create_transition_matrix = function (g)
{
# WARNING: make sure your graph is connected (you might input GCC of your graph)
vs = V(g)
n = vcount(g)
adj = as_adjacency_matrix(g)
adj[diag(rowSums(adj) == 0)] = 1 # handle if the user is using the function for networks with isolated nodes by creating self-edges
z = matrix(rowSums(adj, , 1))
transition_matrix = adj / repmat(z, 1, n) # normalize to get probabilities
return(transition_matrix)
}
random_walk_path = function (g, num_steps, start_node, transition_matrix = NULL){
if(is.null(transition_matrix))
transition_matrix = create_transition_matrix(g)
v = start_node
vs = c()
for(i in 1:num_steps){
#fprintf('Step %d: %d\n', i, v) # COMMENT THIS
PMF = transition_matrix[v, ]
v = sample(1:vcount(g), 1, prob = PMF)
vs = c(vs, v)
}
return(vs)
}
# Problem 3(a)
n = 1000
cat("\nResults with n = ", n)
m = 4
g1 = barabasi.game(n=n, m=m, directed = TRUE)
g2 = barabasi.game(n=n, m=m, directed = TRUE)
g2 = permute(g2, sample(vcount(g2))) # permute the g2 graph
edgelist = as_edgelist(g2)
#plot(g1)
l = length(edgelist)/2
edge_tuple = c()
for (i in 1:l)
{
tuple_to_add = c(edgelist[i], edgelist[i+l])
edge_tuple = c(edge_tuple, tuple_to_add)
}
g1_add = add_edges(g1, edge_tuple)
step = 1000
time = 50
count <- rep(0, n)
for (i in 1:time)
{
start_node = sample(1:vcount(g1_add), 1)
walk_path = random_walk_path(g1_add, step, start_node)
#cat(end_node)
for (i in walk_path)
{
count[i] = count[i]+1
}
}
prob = count/step/time
plot(degree(g1_add, mode='in'), prob, xlab='degree', ylab='probability', main="Node Probability Visted vs Degree",
cex.main = 2, cex.lab =1.5, cex.axis = 1.3)
cor(prob, degree(g1_add, mode='in'))
Results with n = 1000
# Problem 3(b): function
random_walk_teleportation_personalized_path = function (g, num_steps, start_node, teleportation = 0.0, pr = NULL, transition_matrix = NULL){
if(is.null(transition_matrix))
transition_matrix = create_transition_matrix(g)
v = start_node
vs = c()
for(i in 1:num_steps){
#fprintf('Step %d: %d\n', i, v) # COMMENT THIS
PMF = transition_matrix[v, ]
v = sample(1:vcount(g), 1, prob = PMF)
if (runif(1) < teleportation) {
if (is.null(pr))
v = sample(1:vcount(g), 1)
else
v = sample(1:vcount(g), 1, prob = pr)
}
vs = c(vs, v)
}
return(vs)
}
# Problem 3(b): Implement
n = 1000
cat("\nResults with n = ", n)
m = 4
g1 = barabasi.game(n=n, m=m, directed = TRUE)
g2 = barabasi.game(n=n, m=m, directed = TRUE)
g2 = permute(g2, sample(vcount(g2))) # permute the g2 graph
edgelist = as_edgelist(g2)
#plot(g1)
l = length(edgelist)/2
edge_tuple = c()
for (i in 1:l)
{
tuple_to_add = c(edgelist[i], edgelist[i+l])
edge_tuple = c(edge_tuple, tuple_to_add)
}
g1_add = add_edges(g1, edge_tuple)
step = 1000
time = 50
count <- rep(0, n)
for (i in 1:time)
{
start_node = sample(1:vcount(g1_add), 1)
walk_path = random_walk_teleportation_personalized_path(g1_add, step, start_node, teleportation=0.15)
#cat(end_node)
for (i in walk_path)
{
count[i] = count[i]+1
}
}
prob = count/step/time
plot(degree(g1_add, mode='in'), prob, xlab='degree', ylab='probability', main="Node Probability Visted vs Degree \n with Teleportation alpha=0.15",
cex.main = 2, cex.lab =1.5, cex.axis = 1.3)
cor(prob, degree(g1_add, mode='in'))
Results with n = 1000
# Problem 4(a)
n = 1000
cat("\nResults with n = ", n)
m = 4
g1 = barabasi.game(n=n, m=m, directed = TRUE)
g2 = barabasi.game(n=n, m=m, directed = TRUE)
g2 = permute(g2, sample(vcount(g2))) # permute the g2 graph
edgelist = as_edgelist(g2)
#plot(g1)
l = length(edgelist)/2
edge_tuple = c()
for (i in 1:l)
{
tuple_to_add = c(edgelist[i], edgelist[i+l])
edge_tuple = c(edge_tuple, tuple_to_add)
}
g1_add = add_edges(g1, edge_tuple)
step = 1000
time = 50
count <- rep(0, n)
pr = page_rank(g1_add)
for (i in 1:time)
{
start_node = sample(1:vcount(g1_add), 1)
walk_path = random_walk_teleportation_personalized_path(g1_add, step, start_node, teleportation=0.15, pr=pr$vector)
#cat(end_node)
for (i in walk_path)
{
count[i] = count[i]+1
}
}
prob = count/step/time
plot(degree(g1_add, mode='in'), prob, xlab='degree', ylab='probability', main="Node Probability Visted vs Degree \n with PageRank Teleportation alpha=0.15",
cex.main = 2, cex.lab =1.5, cex.axis = 1.3)
cor(prob, degree(g1_add, mode='in'))
Results with n = 1000
# Problem 4(b)
######################### function
random_walk_teleportation_personalized_path = function (g, num_steps, start_node, teleportation = 0.0, pr = NULL, transition_matrix = NULL){
if(is.null(transition_matrix))
transition_matrix = create_transition_matrix(g)
v = start_node
vs = c()
for(i in 1:num_steps){
#fprintf('Step %d: %d\n', i, v) # COMMENT THIS
PMF = transition_matrix[v, ]
v = sample(1:vcount(g), 1, prob = PMF)
if (runif(1) < teleportation) {
if (is.null(pr))
v = sample(1:vcount(g), 1)
else
v = sample(1:vcount(g), 1, prob = pr)
}
vs = c(vs, v)
}
return(vs)
}
###########################
n = 1000
cat("\nResults with n = ", n)
m = 4
g1 = barabasi.game(n=n, m=m, directed = TRUE)
g2 = barabasi.game(n=n, m=m, directed = TRUE)
g2 = permute(g2, sample(vcount(g2))) # permute the g2 graph
edgelist = as_edgelist(g2)
#plot(g1)
l = length(edgelist)/2
edge_tuple = c()
for (i in 1:l)
{
tuple_to_add = c(edgelist[i], edgelist[i+l])
edge_tuple = c(edge_tuple, tuple_to_add)
}
g1_add = add_edges(g1, edge_tuple)
pr = unlist(page_rank(g1_add)$vector)
#print(unlist(pr))
n1 = order(pr)[500]
n2 = order(pr)[501]
cat('\n', degree(g1_add, c(n1,n2), mode='in'))
tele_prob = rep(0, n)
tele_prob[n1]=1/2
tele_prob[n2]=1/2
count <- rep(0, n)
time = 50
step = 1000
for (i in 1:time)
{
start_node = sample(1:vcount(g1_add), 1)
walk_path = random_walk_teleportation_personalized_path(g1_add, step, start_node, teleportation=0.15, pr=tele_prob)
#cat(end_node)
for (i in walk_path)
{
count[i] = count[i]+1
}
}
prob = count/step/time
plot(degree(g1_add, mode='in'), prob, xlab='degree', ylab='probability', main="Node Probability Visted vs Degree \n with Median PageRank Teleport Boost",
cex.main = 2, cex.lab =1.5, cex.axis = 1.3)
cor(prob, degree(g1_add, mode='in'))
Results with n = 1000 2 2
# Problem 4(c)
######################### function
random_walk_teleportation_personalized_path = function (g, num_steps, start_node, teleportation = 0.0, pr = NULL, transition_matrix = NULL){
if(is.null(transition_matrix))
transition_matrix = create_transition_matrix(g)
v = start_node
vs = c()
for(i in 1:num_steps){
#fprintf('Step %d: %d\n', i, v) # COMMENT THIS
PMF = transition_matrix[v, ]
v = sample(1:vcount(g), 1, prob = PMF)
if (runif(1) < teleportation) {
if (is.null(pr))
v = sample(1:vcount(g), 1)
else
v = sample(1:vcount(g), 1, prob = pr)
}
vs = c(vs, v)
}
return(vs)
}
###########################
n = 1000
cat("\nResults with n = ", n)
m = 4
g1 = barabasi.game(n=n, m=m, directed = TRUE)
g2 = barabasi.game(n=n, m=m, directed = TRUE)
g2 = permute(g2, sample(vcount(g2))) # permute the g2 graph
edgelist = as_edgelist(g2)
#plot(g1)
l = length(edgelist)/2
edge_tuple = c()
for (i in 1:l)
{
tuple_to_add = c(edgelist[i], edgelist[i+l])
edge_tuple = c(edge_tuple, tuple_to_add)
}
g1_add = add_edges(g1, edge_tuple)
pr = unlist(page_rank(g1_add)$vector)
print(length(pr))
boost = 500
interest_node_num = 5
interest_node_idx = sample(1:vcount(g1_add), interest_node_num)
weight = rep(1, n)/n
weight[interest_node_idx] = weight[interest_node_idx] * boost
pr_weight = pr * weight / sum(pr * weight)
#print(unlist(pr))
n1 = order(pr)[500]
n2 = order(pr)[501]
cat('\n', degree(g1_add, c(n1,n2), mode='in'))
tele_prob = rep(0, n)
tele_prob[n1]=1/2
tele_prob[n2]=1/2
count <- rep(0, n)
time = 50
step = 1000
for (i in 1:time)
{
start_node = sample(1:vcount(g1_add), 1)
walk_path = random_walk_teleportation_personalized_path(g1_add, step, start_node, teleportation=0.15, pr=pr_weight)
#cat(end_node)
for (i in walk_path)
{
count[i] = count[i]+1
}
}
prob = count/step/time
plot(degree(g1_add, mode='in'), prob, xlab='degree', ylab='probability', main="boost = 500")
Results with n = 1000[1] 1000 2 2